global g_root "C:\Users\Hongye Guo\Dropbox (Personal)\Density\Code\Production\Submission"

use "${g_root}\Data\crsp_monthly_202106.dta", clear
* Filter & lead lag
{
	gen ym = mofd(date)

	tostring siccd, replace
	replace siccd = "" if strlen(siccd) == 0
	replace siccd = "0" + siccd if strlen(siccd) == 3
	replace siccd = "" if siccd== "9999" | siccd== "9990" | siccd == "9910" | siccd == "0"
	rename siccd sic4 
	destring sic4, replace 

	* Apply the bare minimum filter
	keep if inlist(shrcd, 10, 11, 12, 30, 31, 32)
	drop if inlist(secstat, "Q", "W") 

	replace prc = -prc if prc<0
	gen mkval = prc * shrout

	sort permno ym
	tsset permno ym
	gen mkval_l1m = L1.mkval
	gen ret_f1m = F1.ret

	drop if mi(mkval) | mi(ret_f1m) | mi(sic4)

	sort ym 
	by ym: egen temp = pctile(mkval) if primexch == "N", p(10)
	by ym: egen p10_mkval_nyse = max(temp)
	drop temp
	drop if mkval<p10_mkval_nyse 
}

* Aggregate to industry level
{
	gen num = mkval * ret_f1m
	sort ym sic4
	by ym sic4: egen temp_num = total(num)
	by ym sic4: egen ym_sic4_mkval = total(mkval)
	gen ym_sic4_ret_f1m = temp_num/ym_sic4_mkval
	drop temp_num 

	sort ym 
	by ym : egen temp_num = total(num)
	by ym : egen ym_mkval = total(mkval)
	gen ym_ret_f1m = temp_num/ym_mkval
	drop temp_num 

	sort ym sic4
	by ym sic4: gen ym_sic4_count = _N
	by ym sic4: keep if _n==1
}

* Industry level variables generation
{
	gen ym_sic4_exret_f1m = ym_sic4_ret_f1m - ym_ret_f1m

	tsset sic4 ym 
	forvalues i = 1/12{
		local im1 = `i' - 1
		gen ym_sic4_exret_l`im1'm = L`i'.ym_sic4_exret_f1m
	}

	sort ym sic4 
	by ym: egen temp_den = total(ym_sic4_mkval)
	gen wgt = ym_sic4_mkval/temp_den
	drop temp_den

	gen month = month(dofm(ym + 1))
	gen am_flag = mod(month, 3) == 1

	forvalues i = 1/4{
		local j = (`i'-1)*3 
		local jp1 = `j' + 1
		local jp2 = `j' + 2
		foreach l_var in ym_sic4_exret {
			gen `l_var'_prevq_l`i'q = `l_var'_l`j'm if mod(month,3)==2
			replace `l_var'_prevq_l`i'q = `l_var'_l`jp1'm if mod(month,3)==0
			replace `l_var'_prevq_l`i'q = `l_var'_l`jp2'm if mod(month,3)==1
			gen `l_var'_prevq_l`i'q_xam = `l_var'_prevq_l`i'q * am_flag
		}
	}

	gen ym_sic4_exret_prevq_t4q = ym_sic4_exret_prevq_l1q + ym_sic4_exret_prevq_l2q + ym_sic4_exret_prevq_l3q + ym_sic4_exret_prevq_l4q
	gen ym_sic4_exret_prevq_t4q_xam = ym_sic4_exret_prevq_t4q * am_flag
}
tempfile tf_main
save `tf_main', replace

clear matrix
local col_count = 0

* Table 10
* Column 1-2: Connectivity
{
	use `tf_main', clear
	
	gen yq = qofd(dofm(ym))-2
	rename sic4 sic
	merge m:1 sic yq using "$g_root/Data/sic_connect_roe", keep(1 3) nogen
	rename sic sic4

	tsset sic4 ym

	forvalues i = 1/12{
		gen connect_vw_l`i'm = L`i'.connect_vw
	}

	egen connect_vw_t0_11m = rowmean(connect_vw connect_vw_l1m-connect_vw_l11m) 
	egen temp= rownonmiss(connect_vw connect_vw_l1m-connect_vw_l11m) 
	replace connect_vw_t0_11m =.  if temp<4
	drop temp

	sort ym
	by ym: egen p_50 = pctile(connect_vw_t0_11m) if ym_sic4_count>19, p(50)

	reg ym_sic4_exret_f1m ym_sic4_exret_prevq_t4q am_flag ym_sic4_exret_prevq_t4q_xam [weight = wgt] if ym_sic4_count>19 & connect_vw_t0_11m<=p_50 & !mi(connect_vw_t0_11m), cluster(ym)		
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	
	reg ym_sic4_exret_f1m ym_sic4_exret_prevq_t4q am_flag ym_sic4_exret_prevq_t4q_xam [weight = wgt] if ym_sic4_count>19 & connect_vw_t0_11m>p_50 & !mi(connect_vw_t0_11m), cluster(ym)	
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	

}

* Column 3-6: Entry cost & concentration
{
	use `tf_main', clear
	
	* Annual variables timing logic follows Fama & French
	gen fyear = year(dofm(ym+7))-2
	merge m:1 sic4 fyear using "$g_root/Data/sic4_encost", keep(1 3) nogen

	sort fyear 
	by fyear: egen p_50 = pctile(fy_sic4_concentration3) if ym_sic4_count>19, p(50)

	reg ym_sic4_exret_f1m ym_sic4_exret_prevq_t4q am_flag ym_sic4_exret_prevq_t4q_xam [weight = wgt] if ym_sic4_count>19 & fy_sic4_concentration3<=p_50 & !mi(fy_sic4_concentration3), cluster(ym)		
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	
	reg ym_sic4_exret_f1m ym_sic4_exret_prevq_t4q am_flag ym_sic4_exret_prevq_t4q_xam [weight = wgt] if ym_sic4_count>19 & fy_sic4_concentration3>p_50 & !mi(fy_sic4_concentration3), cluster(ym)	
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	
	
	drop fy_sic4_concentration* p_*

	merge m:1 sic4 fyear using "$g_root/Data/sic4_concentration", keep(1 3) nogen

	sort fyear 
	by fyear: egen p_50 = pctile(fy_sic4_concentration3) if ym_sic4_count>19, p(50)

	reg ym_sic4_exret_f1m ym_sic4_exret_prevq_t4q am_flag ym_sic4_exret_prevq_t4q_xam [weight = wgt] if ym_sic4_count>19 & fy_sic4_concentration3<=p_50 & !mi(fy_sic4_concentration3), cluster(ym)		
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	
	reg ym_sic4_exret_f1m ym_sic4_exret_prevq_t4q am_flag ym_sic4_exret_prevq_t4q_xam [weight = wgt] if ym_sic4_count>19 & fy_sic4_concentration3>p_50 & !mi(fy_sic4_concentration3), cluster(ym)	
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	
	
}

* Column 7-8: Size
{
	use `tf_main', clear
	
	bysort ym: egen p_50_count = pctile(ym_sic4_count), p(50)

	reg ym_sic4_exret_f1m ym_sic4_exret_prevq_t4q am_flag ym_sic4_exret_prevq_t4q_xam [weight = wgt] if ym_sic4_count<=p_50_count & !mi(ym_sic4_count), cluster(ym)		
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	
	reg ym_sic4_exret_f1m ym_sic4_exret_prevq_t4q am_flag ym_sic4_exret_prevq_t4q_xam [weight = wgt] if ym_sic4_count>p_50_count & !mi(ym_sic4_count), cluster(ym)	
	local col_count = `col_count' + 1
	estimates store sep_`col_count'	

}
local l_date : display %tdCYND date(c(current_date), "DMY")
local l_time = substr(c(current_time), 1, 2) + substr(c(current_time), 4, 2)
esttab sep_* using "$g_root/Output/Split_`l_date'`l_time'.csv", star(* 0.1  ** 0.05 *** 0.01) b(3) t(2) r2 br noomit replace 
